from scrapy.spider import BaseSpider
from scrapy.selector import Selector

from spider.items import SpiderItem
import MySQLdb


class CarmodelSpider(BaseSpider):

    name = "carmtest"
    conn = MySQLdb.connect(host="localhost",user="spider",passwd="Xjk@4047218?",db="spider")
    cursor = conn.cursor() 
    sql = "select url from sp_ah_carstyle order by id asc"
    result = cursor.execute(sql)
    carstyleUrlList = []

    for row in cursor.fetchall():
        carstyleUrlList.append(row[0])
    cursor.close()

    start_urls = carstyleUrlList
    #start_urls =['http://www.autohome.com.cn/18','http://www.autohome.com.cn/3415'] 

    def parse(self,response):
        sel = Selector(response)
        area = sel.xpath('//div[@id="speclist20"]')
        itemx = SpiderItem()   
        itemx["carmodel"] = area.xpath('.//div[@class="interval01-list-cars-infor"]//a[1]/text()').extract()
        itemx["carmodelurl"] = area.xpath('.//div[@class="interval01-list-cars-infor"]//a[1]/@href').extract()
        itemx["mark"] = area.xpath('.//ul[@class="interval01-list"]//div[@class="interval01-list-guidance"]/div/text()').extract()

        for r in itemx["mark"]:
            if r.strip()=='':
                itemx["mark"].remove(r)
        
        for i in range(len(itemx["mark"])):
            itemx["mark"][i] = itemx["mark"][i].strip()

        conn = MySQLdb.connect(host="localhost",user="spider",passwd="Xjk@4047218?",db="spider")
        cursor = conn.cursor() 
        sql = "select id from sp_ah_carstyle where url='"+response.url+"'"
        cursor.execute(sql)

        for row in cursor.fetchall():
            itemx["pid"] = row[0]
        itemx["type"] = "carmodel"
        itemx["autohomepid"]=response.url.split("/")[-2]

        return itemx

